
# import the necessary libraries
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta
# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
import pycountry
import plotly.offline as py
import plotly.express as px
from ipywidgets import widgets
from IPython.display import display
!jupyter nbextension enable --py --sys-prefix widgetsnbextension
py.init_notebook_mode(connected=True)
import folium
from folium import plugins
plt.style.use("fivethirtyeight")# for pretty graphs
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
# Increase the default plot size and set the color scheme
plt.rcParams['figure.figsize'] = 8, 5
# Disable warnings
import warnings
warnings.filterwarnings('ignore')
#!pip install pywaffle
from pywaffle import Waffle
from ipywidgets import widgets
from IPython.display import display
!jupyter nbextension enable --py --sys-prefix widgetsnbextension
py.init_notebook_mode(connected=True)
import folium
from folium import plugins
plt.style.use("fivethirtyeight")# for pretty graphs
plt.rcParams['image.cmap'] = 'viridis'
df_cov = pd.read_csv('covid_19_data.csv')
df_cnf = pd.read_csv('time_series_covid_19_confirmed.csv')
df_rec = pd.read_csv('time_series_covid_19_recovered.csv')
df_death = pd.read_csv('time_series_covid_19_deaths.csv')
df_cov.drop(columns=['SNo'],inplace=True)
df_cov['ObservationDate'] = pd.to_datetime(df_cov['ObservationDate'] )
df_cov = df_cov.set_index('ObservationDate')
df_wrld = df_cov.loc[:,['Confirmed','Deaths','Recovered']]
df_wrld = df_wrld.groupby(['ObservationDate']).sum()
print(df_wrld.head())
# df_wrld.head()
cnf_data = go.Scatter(x=df_wrld.index,
y=df_wrld.Confirmed, name = "Confirmed")
dea_data = go.Scatter(x=df_wrld.index,
y=df_wrld.Deaths,
yaxis='y2', name = "Deaths")
rec_data = go.Scatter(x=df_wrld.index,
y=df_wrld.Recovered,
yaxis='y3', name = "Recovered")
layout = go.Layout(title='COVID-19 progression', xaxis=dict(title='Date'),
yaxis=dict(color='blue'),
yaxis2=dict(color='red',
overlaying='y', side='right'),
yaxis3=dict(color='green',
overlaying='y', side='left'),
template="plotly_dark")
fig = go.Figure(data=[cnf_data,dea_data,rec_data], layout=layout)
fig.show()
cnf_period = df_cnf.drop(columns=['Province/State','Country/Region','Lat','Long']).columns
death_period = df_death.drop(columns=['Province/State','Country/Region','Lat','Long']).columns
rec_period = df_rec.drop(columns=['Province/State','Country/Region','Lat','Long']).columns
df_cnf1 = df_cnf.melt(id_vars=['Province/State','Country/Region','Lat','Long'],value_vars=cnf_period,var_name='Date',value_name='count')
df_death1 = df_death.melt(id_vars=['Province/State','Country/Region','Lat','Long'],value_vars=death_period,var_name='Date',value_name='count')
df_rec1 = df_rec.melt(id_vars=['Province/State','Country/Region','Lat','Long'],value_vars=rec_period,var_name='Date',value_name='count')
df_cnf1.dropna(subset=['count', 'Country/Region'],inplace=True)
df_death1.dropna(subset=['count', 'Country/Region'],inplace=True)
df_rec1.dropna(subset=['count', 'Country/Region'],inplace=True)
fig = px.scatter_geo(df_cnf1, lat='Lat',lon='Long',color='Country/Region',
hover_name="Country/Region", size='count',
animation_frame="Date",
projection="natural earth",
title='Patient Confirm Progression ',template="plotly_dark")
# fig['data'][0].update(mode='markers+text', textposition='bottom center',
# text=df_cnf['Country/Region'].map('{}'.format).astype(str)+' '+\
# str(df_cnf['3/20/20']))
# time.sleep(1)
fig.show()
The Pandemic that Started from China expanded its wings in East Asia by end of January, there after virus slowly propagated to other countries. By start of March, West European Countries especially Italy and Spain were bogged down by it sudden attack. Even gulf countries couldn't stay untouched to Covid 19. Iran was the most affected countries. By end of March, it can be observed that the virus is wrecking havoc in most of the world, majorly in US, Italy, Iran and Spain.
fig = px.scatter_geo(df_death1, lat='Lat',lon='Long',color='Country/Region',
hover_name="Country/Region", size='count',
animation_frame="Date",
projection="natural earth",
title='Patient Death Progression ',template="plotly_dark")
#fig['data'][0].update(mode='markers+text', textposition='bottom center',
#text=df_cnf['Country/Region'].map('{}'.format).astype(str)+' '+\
#str(df_cnf['3/20/20']))
#time.sleep(1)
fig.show()
Until Mid February, hardly any deaths were reported outside the Mainland: China. By the start of March, death toll begun to rise in East Asia, Gulf Countries, West Europe(Italy and Spain) and US. It can seen that, by end of March, the virus had caught hold of almost entire world: USA, West Europe had turned into death hotspots. In this phase, few deaths were also reported from South American and African Continents and some South Asian Countries(India and Pakistan) also fell prey to it.
fig = px.scatter_geo(df_rec1, lat='Lat',lon='Long',color='Country/Region',
hover_name="Country/Region", size='count',
animation_frame="Date",
projection="natural earth",
title='Patient Recovered progression ',template="plotly_dark")
# fig['data'][0].update(mode='markers+text', textposition='bottom center',
# text=df_cnf['Country/Region'].map('{}'.format).astype(str)+' '+\
# str(df_cnf['3/20/20']))
# time.sleep(1)
fig.show()
In Early February, China began to cope up with the situation. Positive outcomes(Patient Recovery Rates) began to surge in China. Following China's steps to curb Death rate, soon precautionary measures, lockdowns were implemented in other parts of the world. It faciliated virus containment and fostered the recovery rates. By End of March, China managed to successfully treat its patients and around 74k patients recovered from virus attack. Positive outcomes were witnessed from Rest of the World too. Italy counted 11k recoveries, Iran and Spain accounted 11k and 9.5k recoveries respectively. The superpower USA however seemed inefficient to cope-up with the Pandemic, as the crisis there continued.
#Loading Clean Dataset
cleaned_data = pd.read_csv('covid_19_clean_complete.csv', parse_dates=['Date'])
cleaned_data.rename(columns={'ObservationDate': 'date',
'Province/State':'state',
'Country/Region':'country',
'Last Update':'last_updated',
'Confirmed': 'confirmed',
'Deaths':'deaths',
'Recovered':'recovered'
}, inplace=True)
# cases
cases = ['confirmed', 'deaths', 'recovered', 'active']
# Active Case = confirmed - deaths - recovered
cleaned_data['active'] = cleaned_data['confirmed'] - cleaned_data['deaths'] - cleaned_data['recovered']
# replacing Mainland china with just China
cleaned_data['country'] = cleaned_data['country'].replace('Mainland China', 'China')
# filling missing values
cleaned_data[['state']] = cleaned_data[['state']].fillna('')
cleaned_data[cases] = cleaned_data[cases].fillna(0)
cleaned_data.rename(columns={'Date':'date'}, inplace=True)
data = cleaned_data
display(data.head())
display(data.info())
# Check if the data is updated
print("External Data")
print(f"Earliest Entry: {data['date'].min()}")
print(f"Last Entry: {data['date'].max()}")
print(f"Total Days: {data['date'].max() - data['date'].min()}")
def p2f(x):
"""
Convert urban percentage to float
"""
try:
return float(x.strip('%'))/100
except:
return np.nan
def age2int(x):
"""
Convert Age to integer
"""
try:
return int(x)
except:
return np.nan
def fert2float(x):
"""
Convert Fertility Rate to float
"""
try:
return float(x)
except:
return np.nan
countries_df = pd.read_csv("population_by_country_2020.csv", converters={'Urban Pop %':p2f, 'Fert. Rate':fert2float,
'Med. Age':age2int})
countries_df.rename(columns={'Country (or dependency)': 'country',
'Population (2020)' : 'population',
'Density (P/Km²)' : 'density',
'Fert. Rate' : 'fertility',
'Med. Age' : "age",
'Urban Pop %' : 'urban_percentage'}, inplace=True)
countries_df['country'] = countries_df['country'].replace('United States', 'US')
countries_df = countries_df[["country", "population", "density", "fertility", "age", "urban_percentage"]]
countries_df.head()
data = cleaned_data
#Merging Covid_19 Data and Countries data
data = pd.merge(data, countries_df, on='country')
#Loading Temperature Data and performing some Preprocessing
df_temperature = pd.read_csv("temperature_dataframe.csv")
df_temperature['country'] = df_temperature['country'].replace('USA', 'US')
df_temperature['country'] = df_temperature['country'].replace('UK', 'United Kingdom')
df_temperature = df_temperature[["country", "province", "date", "humidity", "sunHour", "tempC", "windspeedKmph"]].reset_index()
df_temperature.rename(columns={'province': 'state'}, inplace=True)
df_temperature["date"] = pd.to_datetime(df_temperature['date'])
df_temperature['state'] = df_temperature['state'].fillna('')
df_temperature.info()
# Merging temperature data on Covid19 Data
data = data.merge(df_temperature, on=['country','date', 'state'], how='inner')
data['mortality_rate'] = data['deaths'] / data['confirmed']
data.head()
data.describe()
#Data Processing
temp_gdf = data.groupby(['date', 'country'])['tempC', 'humidity'].mean()
temp_gdf = temp_gdf.reset_index()
temp_gdf['date'] = pd.to_datetime(temp_gdf['date'])
temp_gdf['date'] = temp_gdf['date'].dt.strftime('%m/%d/%Y')
temp_gdf['tempC_pos'] = temp_gdf['tempC'] - temp_gdf['tempC'].min() # To use it with size
wind_gdf = data.groupby(['date', 'country'])['windspeedKmph'].max()
wind_gdf = wind_gdf.reset_index()
wind_gdf['date'] = pd.to_datetime(temp_gdf['date'])
wind_gdf['date'] = wind_gdf['date'].dt.strftime('%m/%d/%Y')
target_gdf = data.groupby(['date', 'country'])['confirmed', 'deaths'].sum()
target_gdf = target_gdf.reset_index()
target_gdf['date'] = pd.to_datetime(target_gdf['date'])
target_gdf['date'] = target_gdf['date'].dt.strftime('%m/%d/%Y')
fig = px.scatter_geo(temp_gdf.fillna(0), locations="country", locationmode='country names',
color="tempC", size='tempC_pos', hover_name="country",
range_color= [-20, 45],
projection="natural earth", animation_frame="date",
title='Temperature by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
The Countries like Canada, Ireland, Russia experienced lowest tempeartures of around -10C to 0C The Northern Hemisphere countries hold average 20C throughout the period. The Countries near Equator experienced significant change in temperatures throughout the period. By mid March, their temperature can be taken as 30C as mean temperature. Countries in Southern Hemisphere had temperatures in range of 25C on an average.
The second figure is humidity by country. It seems there's no clear location-humidity relation like temperature. We can see humidity is relatively low in China, while humidity is always high in Europe region.
fig = px.scatter_geo(temp_gdf.fillna(0), locations="country", locationmode='country names',
color="humidity", size='humidity', hover_name="country",
range_color= [0, 100],
projection="natural earth", animation_frame="date",
title='COVID-19: Humidity by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
gdf = pd.merge(target_gdf, temp_gdf, on=['date', 'country'])
gdf['confirmed_log1p'] = np.log1p(gdf['confirmed'])
gdf['deaths_log1p'] = np.log1p(gdf['deaths'])
gdf['mortality_rate'] = gdf['deaths'] / gdf['confirmed']
gdf = pd.merge(gdf, wind_gdf, on=['date', 'country'])
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="tempC", size='confirmed', hover_name="country",
range_color= [-20, 45],
projection="natural earth", animation_frame="date",
title='COVID-19: Confirmed VS Temperature by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
It can be seen that Corona started in China when the temperature was cold, but its spread wasn't affected much even when the temperature increased in China. Also Corona spread in Europe started with relatively high, medium temperature (around 20C). Thus, Covid-19's high contagiosity might be the reason behind its wide spread, despite the charactteristic properties of virus exhibiting weakness against the high temperatures.
In following visualization, Circle size is now shown with log scale, to indicate how corona spread affected minor countries.
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="tempC", size='confirmed_log1p', hover_name="country",
range_color= [-20, 45],
projection="natural earth", animation_frame="date",
title='COVID-19: log1p(confirmed) VS Temperature by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="tempC", size='deaths', hover_name="country",
range_color= [-20, 45],
projection="natural earth", animation_frame="date",
title='COVID-19: deaths VS temperature by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
Number of deaths witnessed was high in China, Europe, US and Iran. Even though these are north-side i.e cooler temperature regions, it might be because of high population density. In USA, most of the deaths belonged to Newyork(one of the most crowded province of USA).
Mortality rate can be checked, instead of total number of deaths, to see if the weather affect on Coronavirus worsening.
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="tempC", size='mortality_rate', hover_name="country",
range_color= [-20, 45],
projection="natural earth", animation_frame="date",
title='COVID-19: Mortality rate VS Temperature by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
We see that mortality rate was not so related to region or temperature. Mortality rate was observe to be high at the beginning stage of spread in each country (maybe because total inspection number was low), but many countries seemed to be converging to around 3% mortality rate.
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="humidity", size='confirmed_log1p', hover_name="country",
range_color= [0, 100],
projection="natural earth", animation_frame="date",
title='COVID-19: log1p(confirmed) VS Humidity by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
Corona spread was not only seen in China where humidity was low but also in Europe where humidity was high. Thus, Humidity did not seem to affect propagation of Covid-19 anyway.
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="humidity", size='mortality_rate', hover_name="country",
range_color= [0, 100],
projection="natural earth", animation_frame="date",
title='COVID-19: Mortality rate VS humidity by country', color_continuous_scale="portland", template="plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
Couldnt find any conclusive evidence to establish any correlation between humidity and mortality rate of Covid-19
Visualizing relationship between wind speed and Corona spread.
fig = px.scatter_geo(gdf.fillna(0), locations="country", locationmode='country names',
color="windspeedKmph", size='confirmed_log1p', hover_name="country",
range_color= [0, 40],
projection="natural earth", animation_frame="date",
title='COVID-19: log1p(Confirmed) VS Wind speed by country', color_continuous_scale="portland", template = "plotly_dark")
# fig.update(layout_coloraxis_showscale=False)
fig.show()
Could the relatively High Wind Speed be reason for wide spread of Covid-19 in the Europe region in short term?
From the data analysis, it can be concluded that that wheather changes hardly affected Corona's wide spread.
data = cleaned_data
data = pd.merge(data, countries_df, on='country')
# latest
full_latest = data[data['date'] == max(data['date'])].reset_index()
china_latest = data[data['country']=='China']
row_latest = full_latest[full_latest['country']!='China']
# latest condensed
full_latest_grouped = full_latest.groupby('country')['confirmed', 'deaths', 'recovered', 'active'].sum().reset_index()
china_latest_grouped = china_latest.groupby('state')['confirmed', 'deaths', 'recovered', 'active'].sum().reset_index()
row_latest_grouped = row_latest.groupby('country')['confirmed', 'deaths', 'recovered', 'active'].sum().reset_index()
temp = data.groupby(['country', 'state'])['confirmed', 'deaths', 'recovered', 'active'].max()
temp = data.groupby('date')['confirmed', 'deaths', 'recovered', 'active'].sum().reset_index()
temp = temp[temp['date']==max(temp['date'])].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1')
# Analysing Corona Cases Country-wise:
temp_f = full_latest_grouped.sort_values(by='confirmed', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')
USA Stats:================Italy Stats:===============China Stats:===============Spain Stats:==============Germany Stats:
Recovery Rate: 0.85%======Recovery Rate: 12.66%======Recovery Rate: 91.23%======Recovery Rate: 14.23%=====Recovery Rate: 13.08%
Death Rate: 1.50%=========Death Rate: 10.56%=========Death Rate: 4.02%==========Death Rate: 7.80%=========Death Rate: 0.67%
Active Cases: 97.00%======Active Cases: 76.78%=======Active Cases: 4.74%========Active Cases: 78.94%======Active Cases: 86.23%
countries = np.unique(temp_f['country'])
mean_conf = []
for country in countries:
mean_conf.append(temp_f[temp_f['country'] == country]['confirmed'].sum())
# Building the dataframe
data = [ dict(
type = 'choropleth',
locations = countries,
z = mean_conf,
locationmode = 'country names',
text = countries,
marker = dict(
line = dict(color = 'rgb(0,0,0)', width = 1)),
colorbar = dict(autotick = True, tickprefix = '',
title = 'Count')
)
]
# Building the visual
layout = dict(
title = 'COVID-19 Confirmed Cases',
geo = dict(
showframe = False,
showocean = True,
oceancolor = 'rgb(0,255,255)',
projection = dict(
type = 'orthographic',
rotation = dict(
lon = 60,
lat = 10),
),
lonaxis = dict(
showgrid = True,
gridcolor = 'rgb(102, 102, 102)'
),
lataxis = dict(
showgrid = True,
gridcolor = 'rgb(102, 102, 102)'
)
),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')
covid_df = pd.read_csv('covid_19_india.csv')
covid_df.head()
#Imorting necessary libraries and loading related data files:
import geopandas as gpd
import seaborn as sns
sns.set_style('dark')
map_df = gpd.read_file('Indian_States.shp')
map_df.loc[0,['st_nm']] = 'Andaman and Nicobar Islands'
map_df.head()
df_india = pd.read_csv('covid_19_india.csv')
df_ind_bed = pd.read_csv('HospitalBedsIndia.csv')
df_ind_ICMR = pd.read_csv('ICMRTestingDetails.csv')
df_ind_indiv = pd.read_csv('IndividualDetails.csv')
df_ind_census = pd.read_csv('population_india_census2011.csv')
df_india['Confirmed'] = df_india['ConfirmedIndianNational']+ df_india['ConfirmedForeignNational']
df_forMap = df_india.drop(columns=['Date','Sno']).groupby('State/UnionTerritory').sum()
merged = map_df.set_index('st_nm').join(df_forMap)
#merged.fillna(0)
import matplotlib.pyplot as plt
%matplotlib inline
!pip install descartes
fig, ax = plt.subplots(5, figsize=(9, 45))
topic = ['Confirmed','ConfirmedIndianNational','ConfirmedForeignNational','Cured','Deaths']
cmaps = ['Oranges','Blues', 'Purples', 'Greens', 'Reds']
for i,l in enumerate(topic):
ax[i].axis('off')
ax[i].set_title('{} Cases of COVID 19 in India'.format(l), fontdict={'fontsize': '20', 'fontweight' : '5'})
merged.plot(column=l, cmap=cmaps[i], linewidth=0.8, ax=ax[i], edgecolor='0.75', legend=True)
Highlights from above charts: Confirmed Cases: Maharashtra and Kerala topped the list, followed by other states like Uttar Pradesh, Rajasthan, karnataka, Delhi, Gujrat and other states
Foreign Nationals: Among Confirmed cases, few were Foreign nationalists too. Foreign nationalists confirmed cases came from: Haryana, Rajasthan, Maharashtra and Kerala majorly.
Deaths: Majority Deaths reported beloneged to Maharashtra, karnataka, Gujrat and Haryana
Cured: Some positive news of recovery cases recorded were from Uttar Pradesh, Kerala, Rajasthan and Haryana.
#Data Processing
#Plotting Daily Rise in Cases assorted by Confirmed, Cured and Deaths
import plotly.graph_objs as go
df_datechart = df_india.drop(columns=['State/UnionTerritory','Sno']).groupby('Date').sum()
cnf_data = go.Bar(x=df_datechart.index,
y=df_datechart.Confirmed,hovertext='Confirmed', name = "Confirmed")
dea_data = go.Bar(x=df_datechart.index,
y=df_datechart.Deaths,hovertext='Deaths',
yaxis='y2', name = "Deaths")
rec_data = go.Bar(x=df_datechart.index,
y=df_datechart.Cured,hovertext='Cured',
yaxis='y2', name = "Cured")
layout = go.Layout(title='COVID-19 progression in India', xaxis=dict(title='Date'),
yaxis=dict(title='Confirmed',color='blue'),
yaxis2=dict(title='Death', color='red',
overlaying='y', side='right'),
yaxis3=dict(title=' Cured', color='green',
overlaying='y'),
template="plotly_dark")
fig = go.Figure(data=[cnf_data,dea_data,rec_data], layout=layout)
fig.update_traces(marker_line_width=1.5, opacity=0.7)
fig.show()
The Corona Virus case figures began to surge from first week of March. By each passing day, contiinuous rise was witnessed in figures. On averge, 15 new cases were recorded daily until 2nd week of march. The exponential growth was seen henceforth. Death toll increased.
Confirmed Cases: 873
Cured/Recoverd/Migrated: 79
Deaths: 19
df_ind=df_india.groupby(['State/UnionTerritory',"Date"]).head()
States=np.unique(df_ind['State/UnionTerritory'].values)
States
top5aff_states=df_ind.groupby(['State/UnionTerritory']).max().sort_values(['ConfirmedIndianNational'],ascending=False)[:5].index.values
dates=df_ind[df_ind['State/UnionTerritory'] == 'Kerala']['Date'].values
for state in States:
df1=df_ind[df_ind['State/UnionTerritory'] == state]
rec_date_idx=np.where(dates==df1['Date'].values[0])[0][0]
if rec_date_idx >0:
df2=pd.DataFrame()
df2['Date']=dates[:rec_date_idx]
df2['ConfirmedIndianNational'] = np.zeros(rec_date_idx)
df2['ConfirmedForeignNational'] = np.zeros(rec_date_idx)
df2['Cured']=np.zeros(rec_date_idx)
df2['Deaths']=np.zeros(rec_date_idx)
df2['State/UnionTerritory']=state
df2=df2.append(df1,ignore_index=True)
else: df2=df1
df2.to_csv(state+'.csv',index=False)
plt.figure(figsize=(10,10))
for state in States:
df1=pd.read_csv(''+state+'.csv')
df1=df1[30:]
plt.plot(df1['Date'],df1['ConfirmedIndianNational'],"*-",label=state)
#np.savetxt(state+'.txt',df1['ConfirmedIndianNational']+df1['ConfirmedForeignNational'])
plt.xticks(rotation=90)
plt.legend()
plt.savefig('indian_states.png')
Maharashtra and Kerala are the worst affected states.
From 14th March to 28th March, Maharahtra recorded 170 new casesapproximately. Steep rise in daily count was noticed in the period
For Kerala, the Covid-19 impact was seen from 20th March. Within a week, 150 aprox. new cases were recorded.
No cases were recoreded in Tamil Nadu from 25th march to 28th March.
covid_df = df_india.copy()
covid_df.drop(['Sno'],axis=1,inplace=True)
covid_df.index = range(1,covid_df.shape[0]+1)
covid_india = covid_df.copy()
covid_india['Total Confirmed cases'] = covid_india['ConfirmedIndianNational'] + covid_india['ConfirmedForeignNational']
covid_india['Total Active cases'] = covid_india['ConfirmedIndianNational'] + covid_india['ConfirmedForeignNational'] - covid_india['Cured'] - covid_india['Deaths']
covid_india.rename(columns={"State/UnionTerritory": "States", "ConfirmedIndianNational": "Confirmed cases (Indian Nationals)"},inplace=True)
covid_india.rename(columns={"ConfirmedForeignNational": "Confirmed cases (Foreign Nationals)", "Cured": "Cured/Discharged/Migrated"},inplace=True)
covid_india = covid_india[covid_india.States != 'Chattisgarh']
covid_india = covid_india[covid_india.States != 'Pondicherry']
covid_india = covid_india[covid_india.States != 'Union Territory of Jammu and Kashmir']
covid_india = covid_india[covid_india.States != 'Union Territory of Chandigarh']
covid_india = covid_india[covid_india.States != 'Union Territory of Ladakh']
covid_india.index = range(1,covid_india.shape[0]+1)
indian_states = covid_india.copy()
covid_india['Date'] = pd.to_datetime(covid_india['Date'], dayfirst=True)
covid_india.sort_values(by='Date', inplace=True)
covid_ind = covid_df.copy()
covid_ind['Total Confirmed cases'] = covid_ind['ConfirmedIndianNational'] + covid_ind['ConfirmedForeignNational']
covid_ind['Total Active cases'] = covid_ind['ConfirmedIndianNational'] + covid_ind['ConfirmedForeignNational'] - covid_ind['Cured'] - covid_ind['Deaths']
date_wise_data = covid_ind[["Date","Total Confirmed cases","Deaths","Cured"]]
date_wise_data['Date'] = date_wise_data['Date'].apply(pd.to_datetime, dayfirst=True)
date_wise_data
from IPython.display import Markdown
date_wise_data = date_wise_data.groupby(["Date"]).sum().reset_index()
def formatted_text(string):
display(Markdown(string))
formatted_text('***Date wise data***')
date_wise_data
#Time- Bound Cases Visualization:
import plotly.offline as py
import plotly.express as px
temp = date_wise_data.melt(id_vars="Date", value_vars=['Cured', 'Deaths', 'Total Confirmed cases'],
var_name='Case', value_name='Count')
fig = px.area(temp, x="Date", y="Count", color='Case',title='Time wise cases analysis', color_discrete_sequence = ['#21bf73', '#ff2e63', '#fe9801'])
fig.show()
Significant rise in new cases was seen from 1st March 2020.
In March, the count soared from 3 to 873 until 28th march.
The majority cases were still active till then. 19 Death Cases were reported and 79 patients recovered fro Covid-19 until 28th march.
#Tree Map:
statewise_cases = pd.DataFrame(covid_ind.groupby(['State/UnionTerritory'])['Total Confirmed cases', 'Deaths', 'Cured'].max().reset_index())
#statewise_cases["Country"] = "India" # in order to have a single root node
fig = px.treemap(statewise_cases, path=['State/UnionTerritory'], values='Total Confirmed cases',
color='Total Confirmed cases', hover_data=['State/UnionTerritory'],
color_continuous_scale='RdBu')
fig.show()
#Data Processing
covid_ind.head()
covid_ind['Total Cases'] = covid_ind['ConfirmedIndianNational'] + covid_ind['ConfirmedForeignNational']
# Adding Active Cases
covid_ind['Active Cases'] = covid_ind['Total Cases'] - covid_ind['Cured'] - covid_ind['Deaths']
# Renaming Column Names
covid_ind.rename(columns = {'Cured':'Cured/Discharged/Migrated'}, inplace = True)
# Create Temp DF
temp_df = covid_ind[covid_ind['Date']=='28/03/20']
# Statewise Total Cases
df_statewise = temp_df.groupby(['State/UnionTerritory', 'ConfirmedIndianNational', 'ConfirmedForeignNational', 'Cured/Discharged/Migrated'\
, 'Deaths', 'Active Cases'])['Total Cases'].sum().reset_index()
df_statewise
# Creating function fo bolding out max
def highlight_max_count(count):
is_max = count == count.max()
return ['background-color: #1f77b4' if v else '' for v in is_max]
# Distribution of Cases in India
df_statewise.style \
.background_gradient(cmap="Blues", subset=['ConfirmedIndianNational', 'ConfirmedForeignNational', 'Total Cases', 'Active Cases'])\
.background_gradient(cmap="Greens", subset=['Cured/Discharged/Migrated'])\
.background_gradient(cmap="Reds", subset=['Deaths'])
# Statewise
x = df_statewise.groupby('State/UnionTerritory')['Active Cases'].sum().sort_values(ascending=False).to_frame()
x.style.background_gradient(cmap='Reds')
#covid_ind.drop('ConfirmedIndianNational',axis = 1,inplace=True)
#covid_ind.drop('ConfirmedForeignNational',axis = 1,inplace=True)
#covid_ind
temp = df_statewise.copy()
#temp.drop('ConfirmedIndianNational',axis = 1,inplace=True)
temp.drop('ConfirmedForeignNational',axis = 1,inplace=True)
temp = temp.sort_values(by='Total Cases', ascending=False)
temp = temp[['State/UnionTerritory', 'Total Cases', 'Active Cases', 'Deaths', 'Cured/Discharged/Migrated']]
temp['Mortality Rate'] = round((temp['Deaths']/temp['Total Cases'])*100,2)
temp = temp.reset_index(drop=True)
temp.head(10)
temp.style.background_gradient(cmap="Reds", subset=['Total Cases', 'Active'])\
.background_gradient(cmap="Greens", subset=['Cured/Discharged/Migrated'])\
.background_gradient(cmap="Oranges_r", subset=['Deaths'])\
.background_gradient(cmap="seismic_r",subset=['Mortality Rate'])
%%HTML
<div class='tableauPlaceholder' id='viz1585145553118' style='position: relative'>
<noscript>
<a href='#'>
<img alt=' ' src='https://public.tableau.com/static/images/Bo/Book1_31496/Dashboard3/1_rss.png' style='border: none' />
</a>
</noscript>
<object class='tableauViz' style='display:none;'>
<param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' />
<param name='embed_code_version' value='3' />
<param name='site_root' value='' />
<param name='name' value='Book1_31496/Dashboard3' />
<param name='tabs' value='no' />
<param name='toolbar' value='yes' />
<param name='static_image' value='https://public.tableau.com/static/images/Bo/Book1_31496/Dashboard3/1.png' />
<param name='animate_transition' value='yes' />
<param name='display_static_image' value='yes' />
<param name='display_spinner' value='yes' />
<param name='display_overlay' value='yes' />
<param name='display_count' value='yes' />
<param name='filter' value='publish=yes' />
</object>
</div>
<script type='text/javascript'>
var divElement = document.getElementById('viz1585145553118');
var vizElement = divElement.getElementsByTagName('object')[0];
if ( divElement.offsetWidth > 800 )
{
vizElement.style.minWidth='420px';
vizElement.style.maxWidth='650px';
vizElement.style.width='100%';
vizElement.style.minHeight='587px';
vizElement.style.maxHeight='887px';
vizElement.style.height=(divElement.offsetWidth*0.75)+'px';
}
else if ( divElement.offsetWidth > 500 )
{
vizElement.style.minWidth='420px';
vizElement.style.maxWidth='650px';
vizElement.style.width='100%';
vizElement.style.minHeight='587px';
vizElement.style.maxHeight='887px';
vizElement.style.height=(divElement.offsetWidth*0.75)+'px';
}
else
{
vizElement.style.width='100%';
vizElement.style.height='727px';
}
var scriptElement = document.createElement('script');
scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';
vizElement.parentNode.insertBefore(scriptElement, vizElement);
</script>
Maharashtra and Kerala Reported maximum cases, no cases were reported in North East India except few parts like: Manipur, Mizoram(1 Each).
This visualization showed the spread of the virus in India, and the cluster formations. Two clusters(Dark Shades), which were quite profound: Kerala and Maharashtra - with 186 and 182 confirmed cases respectively, and 6 and 1 deaths respectively. Kerala was the first affected place in India. There were small clusters forming in North India. Given the population density of India, this would prove harmful, and could lead to a massive single cluster, if people abstain from good practices, such as self-quarantine, sanitization etc. The clusters in rest of India were sparsely situated(Light Shaded). Hence, proper caution would gradually lead to the death of those clusters.
#Overall
#df_india = pd.read_csv('covid_19_india.csv')
#cov_ind = df_india.copy()
#cov_ind['Confirmed'] = cov_ind['ConfirmedIndianNational'] + cov_ind['ConfirmedForeignNational']
#cov_ind['Active'] = cov_ind['ConfirmedIndianNational'] + cov_ind['ConfirmedForeignNational'] - cov_ind['Cured'] - cov_ind['Deaths']
a_c= temp['Active Cases'].sum()
r_d = temp['Cured/Discharged/Migrated'].sum()
d_h = temp['Deaths'].sum()
fig = go.Figure(data=[go.Pie(labels=['Active Cases','Cured','Death'],
values= [a_c,r_d,d_h],hole =.3)])
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
marker=dict(colors=['#263fa3', '#2fcc41','#cc3c2f'], line=dict(color='#FFFFFF', width=2)))
fig.update_layout(title_text='Current Situation in India',plot_bgcolor='rgb(275, 270, 273)')
fig.show()
Active Cases: 775(88.8%)
Cured/Recovered/Migrated: 79(9.05%)
Deaths: 19(2.18%)
#Reading Hospital Data
hospdata=pd.read_csv("HospitalBedsIndia.csv")
#Cleaning Hospital Data:
hospdata=hospdata.drop(['Unnamed: 12', 'Unnamed: 13'], axis=1)
hospdata.rename(columns = {'NumPrimaryHealthCenters_HMIS':'Primary Health Center',
'NumCommunityHealthCenters_HMIS':'Community Health Center',
'NumSubDistrictHospitals_HMIS':'Sub District Hospital',
'NumDistrictHospitals_HMIS':'District Hospitals'}, inplace = True)
hospdata.rename(columns = {'TotalPublicHealthFacilities_HMIS':'Total Public Health Facility',
'NumPublicBeds_HMIS':'Public Beds',
'NumRuralHospitals_NHP18':'Rural Hospitals',
'NumRuralBeds_NHP18':'Rural Hosp Beds',
'NumUrbanHospitals_NHP18':'Urban Hospitals',
'NumUrbanBeds_NHP18':'Urban Hosp Beds'}, inplace = True)
hospdata1=hospdata.drop([36,37], axis=0)
fig = px.bar(hospdata1.sort_values('Urban Hospitals', ascending=False).sort_values('Urban Hospitals', ascending=True),
x="Urban Hospitals", y="State/UT", title='Total Urban Health Centres', text='Urban Hospitals', orientation='h',width=1000, height=700, range_x = [0, max(hospdata1['Urban Hospitals'])])
fig.update_traces(marker_color='#46cdcf', opacity=0.8, textposition='inside')
fig.update_layout(plot_bgcolor='rgb(250, 242, 242)')
fig.show()
Urban Hospitals Count:
Tamil Nadu: 525
Maharashtra: 438
Karnataka: 374
Analysing Area of State, its population and Progression Rate of Covid-19, will India be able to restrict the spread of Covid-19 given its medical contraints?
fig = px.bar(hospdata1.sort_values('Rural Hospitals', ascending=False).sort_values('Rural Hospitals', ascending=True),
x="Rural Hospitals", y="State/UT", title='Total Rural Health Centers', text='Rural Hospitals', orientation='h',width=1000, height=700, range_x = [0, max(hospdata1['Rural Hospitals'])])
fig.update_traces(marker_color='#46cdcf', opacity=0.8, textposition='inside')
fig.update_layout(plot_bgcolor='rgb(230, 242, 242)')
fig.show()
sns.set_style("white")
sns.set_context({"figure.figsize": (24, 24)})
sns.barplot(x = hospdata['Urban Hosp Beds'], y = hospdata1['State/UT'], color = "red")
bottom_plot = sns.barplot(x = hospdata1['Rural Hosp Beds'], y = hospdata1['State/UT'], color = "#0000A3", )
topbar = plt.Rectangle((0,0),1,1,fc="red", edgecolor = 'none')
bottombar = plt.Rectangle((0,0),1,1,fc='#0000A3', edgecolor = 'none')
l = plt.legend([bottombar, topbar], ['Rural Hosp Beds', 'Urban Hosp Beds'], loc=1, ncol = 2, prop={'size':16})
l.draw_frame(False)
sns.despine(left=True)
bottom_plot.set_ylabel("States")
bottom_plot.set_xlabel("Hospital Beds")
for item in ([bottom_plot.xaxis.label, bottom_plot.yaxis.label] +
bottom_plot.get_xticklabels() + bottom_plot.get_yticklabels()):
item.set_fontsize(16)
West Bengal had most Hospital Beds: 20000 Rural and 40000 Urban. It could fight well with the Virus with those medical facilities
Karnataka statistics revealed, it had total of 50000 hospital beds capacity. Out of which 21000 approx are Rural Beds.
Rajasthan, Madhya Pradesh, Maharashtra and UttarPradesh though being bigger states w.r.t area and population, they have limited beds capacity.Those states require to take immediate necessary steps to tackle the Covid-19 Pandemic.
fig = px.bar(hospdata1, x="Primary Health Center", y="State/UT", color='Primary Health Center', orientation='h', height=800,
title='Primary Health Centre', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.update_layout(plot_bgcolor='rgb(250, 242, 242)')
fig.show()
Primary Health Centres are meant to provide primary medical treatment to patients. Uttar Pradesh had most Primary Health Centres(3277) Other states need to rise to the cause too.
fig = px.bar(hospdata1, x="Total Public Health Facility", y="State/UT", color='Total Public Health Facility', orientation='h', height=800,
title='Total Health Facility in India', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.update_layout(plot_bgcolor='rgb(250, 242, 242)')
fig.show()
fig = px.scatter(hospdata1, x="Total Public Health Facility", y="Public Beds", color="State/UT", marginal_y="rug", marginal_x="histogram")
fig
fig = px.scatter(hospdata1, x="Sub District Hospital", y="District Hospitals", color="State/UT", marginal_y="rug", marginal_x="histogram")
fig
Overall Public Health Facilities are highest in Uttar Pradesh followed by Maharashtra and Karnataka. Urban Health Centers are highest in Tamil Nadu. Rural Health Centers are highest in UP.
#Loading Data:
df_hos_bed = pd.read_csv('ICMRTestingDetails.csv')
df_hos_bed['DateTime'] = pd.to_datetime(df_hos_bed['DateTime'])
df_hos_bed['DateTime'] = df_hos_bed['DateTime'].dt.date
df_hos_bed.head()
#Data Cleaning and Processing:
df_hos_bed['totalnegative'] = df_hos_bed['TotalIndividualsTested'] - df_hos_bed['TotalPositiveCases']
df_hos_bed_per_day = df_hos_bed.drop_duplicates(subset=['DateTime'], keep='last')
df_hos_bed_per_day['test_results_posratio'] = round(df_hos_bed_per_day['TotalPositiveCases']/df_hos_bed_per_day['TotalIndividualsTested'], 3)
df_hos_bed_per_day.head()
colors = ['#269A06', '#AF0E06']
negative = round(df_hos_bed['totalnegative'].sum()/df_hos_bed['TotalIndividualsTested'].sum()*100, 2)
positive = round(df_hos_bed['TotalPositiveCases'].sum()/df_hos_bed['TotalIndividualsTested'].sum()*100, 2)
fig = go.Figure(data=[go.Pie(labels=['People who tested Negative','People who tested Positive'],
values= [negative,positive],hole =.5)])
fig.update_traces(title_text='COVID19 Test Results', hoverinfo='label+percent', textinfo='value', textfont_size=15,
marker=dict(colors=colors, line=dict(color='#FFFFFF', width=2)))
fig.show()
The phase when the testings were done on suspects, majority of suspects were either citizens with recent travel history or the relatives and acquaintances of such citizens. Testings were deliberately and randomly carried at Airports majorly. Hence, the ratio of negative tested was found so high.
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_hos_bed_per_day['DateTime'], y=df_hos_bed_per_day['test_results_posratio'], name='Confirmed Cases', \
marker=dict(color='#D32210')))
fig1.layout.update(title_text='COVID-19 Positive Detection per Test Ratio in India w.r.t. Time',xaxis_showgrid=False, width=700,
height=500,font=dict(
# family="Courier New, monospace",
size=12,
color="white"
))
fig1.layout.plot_bgcolor = '#097E99'
fig1.layout.paper_bgcolor = '#097E99'
fig1.show()
df_indi = pd.read_csv('IndividualDetails.csv')
df_indi.head()
df_indi.dropna(subset=['current_status', 'age'], inplace=True)
df_indi.reset_index(drop=True, inplace=True)
df_indi['current_status'].unique(), df_indi.shape
df1_indians = df_indi[df_indi['current_status'] == 'Deceased']
df3_indians = df_indi[df_indi['current_status'] == 'Hospitalized']
df2_indians = df_indi[df_indi['current_status'] == 'Recovered']
cdf = pd.concat([df1_indians, df2_indians, df3_indians])
plt.figure(figsize=(12,12))
sns.boxplot(x="current_status", y="age", data=cdf).set_title("India's Outcome till now Age-Wise")
plt.show()
The Patients Hospitalized belonged to age group: 22 to 60 Recovered Patients belonged to age group: 30 to 65 Deceased Patients: All Deceased Patients were Senior Citizens except a adult aged around 40 years.
pep_no_trav_his = df_indi[df_indi['notes'].str.contains('Travel') == False]
pep_with_trav_his = df_indi[df_indi['notes'].str.contains('Travel') == True]
df_indi['id'].nunique(), pep_no_trav_his['id'].nunique()
colors = ['#B5B200', '#1300B5']
negative = round(pep_no_trav_his['id'].nunique()/df_indi['id'].nunique()*100, 2)
positive = round(pep_with_trav_his['id'].nunique()/df_indi['id'].nunique()*100, 2)
fig = px.pie(pep_no_trav_his, values=[negative, positive], names=['Patients w/o Travel History', 'Patients with Travel History'], \
title='Patients with and without Travel History')
fig.show()
23% of Total Patients tested positive were the ones with no Recent Travel History( Majority of them were Relatives, friends or people that directly or indirectly came in contact with the patients with Travel History) However, 23% is a large number to indicate the risk of Progression of Virus into Satge III (Community Spread)
#Data Processing
individual_details = df_indi.rename(columns=lambda x: x.strip())
cols_to_drop = ['unique_id','id','government_id','detected_city_pt','notes','current_location','current_location_pt','contacts']
filter_data = individual_details.drop(cols_to_drop,axis=1)
filter_data.head()
# Convert dates in one format
import datetime as dt
filter_data['status_change_date'] = pd.to_datetime(filter_data['status_change_date'])
filter_data['diagnosed_date'] = pd.to_datetime(filter_data['diagnosed_date'])
filter_data['Duration of Any Status'] = filter_data['status_change_date'] - filter_data['diagnosed_date']
filter_data['Duration of Any Status'] = filter_data['Duration of Any Status'].dt.days
filter_data['status_change_date'] = filter_data['status_change_date'].dt.strftime('%Y-%m-%d')
filter_data['diagnosed_date'] = filter_data['diagnosed_date'].dt.strftime('%Y-%m-%d')
filter_data.info()
#Dropping Detetcted City and District as there are values for State
drop_cols = ['detected_city','detected_district']
covid_india_df = filter_data.drop(drop_cols,axis=1)
covid_india_df.info()
covid_india_df.describe()
#Filling NAs in age with median
covid_india_df.describe()
covid_india_df['age'] = covid_india_df['age'].fillna(covid_india_df.age.median())
covid_india_df['current_status'] = covid_india_df['current_status'].fillna(method='ffill')
covid_india_df.info()
covid_india_df.head()
# Now at the broader scale by looking at the Duration. And we'll see if we can do any Clustering
plt.figure(figsize=(18,9))
sns.scatterplot(y = covid_india_df['Duration of Any Status'],x = covid_india_df['current_status']);
plt.xlabel('Status of the Patient');
plt.ylabel('Duration of Days from the time they were Admitted');
plt.title('Distribution of Duration of Days wioth the Status of patients!');
# disecting age into bins to see which age group was affected most with covid-19
# Taking a broad age group to form bins
age_bins = [0,20,40,60,80,100]
plt.figure(figsize=(16,6))
sns.countplot(x=pd.cut(covid_india_df.age, age_bins), hue=covid_india_df.current_status, palette = ['#263A90', '#FFFF00', '#ee0a0a'])
plt.xticks(rotation=90)
plt.xlabel("Age Groups")
plt.yscale('log')
plt.title("Age Groups affected with Covid-19")
plt.grid(True)
plt.show()
People with Age beyond 40s were found to be more vulnerable.
It was observed that Senior Citizens were at potential risk as their Death rate was higher than Recovery Rate.
People with Age below 40 had hgher recovery chances.
In age group of Children and young-adults, no deaths were reported.
The first COVID-19 case in India was reported on 30th January 2020, a student who arrived in Kerala state from Wuhan, China followed by 2 more cases in Kerala. For almost a month, no new cases were reported in India.
However, on 8th March 2020, five new cases of COVID-19 in Kerala were again reported and since then the cases had been rising affecting 22 states and resulting in 19 deaths across the country as of 28th March 2020.
On 13th March 2020, India reported its first coronavirus fatality in the state of Karnataka, followed by 3 more deaths in other states.
On 28th March 2020, confirmed COVID-19 cases had risen to 775 with the state of Maharashtra bagging the maximum number of cases.
As the number of confirmed cases increased in India, the question arised "did those numbers represent the true number of cases in India?"
Three statistically determined variables that were used to obtain the estimation curve:
doubling_rate = 6. days
Mortality_rate = 0.01 or 1 %
days_to_death = 17 days
It is the time it takes for the number of cases to double. For Coronavirus, it was found to be around 6 days on average. For now, the same rate is assumed for the sake of analysis.
It is assumed to be around 1%. REASON Countries that are well prepared had fatality rate of ~0.5% (South Korea) to ~0.9% (rest of China). Countries that were under prepared had fatality rate between ~3%-5%. In other words: Countries that acted fast, wereable to reduce the number of deaths by a factor of ten. Acting fast includes - massive testing, measures taken to enforce Lockdown/Social Distancing/Quarantine to reduce the rate of spread (Flatten the curve)
It is statistically determined to be around 17 days. For now, the same rate is assumed for the sake of analysis.
doubling_rate = 6
Mortality_rate = 0.01
days_to_death = 17
from ipywidgets import widgets
DR_slider = widgets.FloatSlider(
value=6,
min=2.0,
max=8.0,
step=0.1,
description='Doubling Rate:',
readout=True,
readout_format='.1f',
continuous_update=True
)
M_slider = widgets.FloatSlider(
value=1.0,
min=0.5,
max=10.0,
step=0.1,
description='Mortality Rate %:',
readout=True,
readout_format='.1f',
continuous_update=True
)
Mortality_rate = M_slider.value/100
Death_slider = widgets.FloatSlider(
value=17,
min=10.0,
max=60.0,
step=0.5,
description='Days to Death:',
readout=True,
readout_format='.1f',
continuous_update=True
)
days_to_death = Death_slider.value
death_df = covid_india.copy()
death_df_filtered = death_df[death_df.Deaths != 0]
### Finding states with deaths
states = death_df_filtered.States.unique().tolist()
#print("{}\n".format(states))
####Number of days since Jan 30 2020
i = covid_india.iloc[[0]].Date.tolist()[0]
j = death_df.iloc[-1].Date
size = int((j-i).days)+1
actual_total = np.zeros(size, dtype = int) ## Stores total realistic deaths
actual_total_temp = np.zeros(size, dtype = int) ## Stores deaths of individual states and dates
### Loop over each state (The onces with deaths)
count=0
for state in states:
temp = death_df_filtered[death_df_filtered.States == state]
#for death in temp
#deathi = temp.iloc[[0]].Deaths.tolist()[0]
a = temp.drop_duplicates(subset='Deaths', keep="first").Deaths.tolist()
b = np.diff(a)
deaths = np.concatenate(([a[0]], b), axis=0)
### Loop over each state (The onces with deaths)
count=0
for state in states:
temp = death_df_filtered[death_df_filtered.States == state]
#for death in temp
#deathi = temp.iloc[[0]].Deaths.tolist()[0]
a = temp.drop_duplicates(subset='Deaths', keep="first").Deaths.tolist()
b = np.diff(a)
deaths = np.concatenate(([a[0]], b), axis=0)
#print(deaths)
### Loop over each day for each state
for i in range(0, len(deaths)):
### Go back 17.3 days
start = temp.iloc[[i]].Date - timedelta(days=int(days_to_death))
start = int((death_df.iloc[-1].Date - start.tolist()[0]).days)
#print(start)
### Calculating the realistic cases for each day and stored in array for each state for each date
actual_total_temp[size - start-2] = deaths[i]/Mortality_rate
for i in range(size - start + int(doubling_rate) - 2, size, int(doubling_rate)):
actual_total_temp[i] = actual_total_temp[i-int(doubling_rate)]*2
#print("{}\n".format(actual_total_temp))
### Smoothening the curve for each array formed above
for i in range(size - start - 2, size-1, int(doubling_rate)):
smoother = int(actual_total_temp[i]/doubling_rate)
for j in range(i+1, i+int(doubling_rate)):
actual_total_temp[j] = actual_total_temp[j-1] + smoother
if j == size-1:
i = size-1
break
print("{}\n".format(actual_total_temp))
### Adding each state for each day to actual_total (Array)
actual_total = np.add(actual_total,actual_total_temp)
actual_total_temp = actual_total_temp*0
#### Finally, actual_total is plotted next
display(DR_slider, M_slider, Death_slider)
covid_india.sort_values(by=['States', 'Total Confirmed cases'], ascending = [True, False],inplace=True)
covid_india.drop_duplicates(subset='States', keep="first",inplace=True)
covid_india.sort_values(by='Total Confirmed cases', ascending = False,inplace=True)
covid_india.index = range(1,covid_india.shape[0]+1)
print(f'Total number of Confirmed COVID 2019 cases across India:', covid_india['Total Confirmed cases'].sum())
print('Estimation of realistic number of cases across India: {} - {} (Around {}-{}x Confirmed cases)'.format(int(actual_total[-1]/2), actual_total[-1],
int(actual_total[-1]/2/covid_india['Total Confirmed cases'].sum()),
int(actual_total[-1]/covid_india['Total Confirmed cases'].sum())))
print('\n')
print(f'Total number of Active COVID 2019 cases across India:', covid_india['Total Active cases'].sum())
print(f'Total number of Cured/Discharged/Migrated COVID 2019 cases across India:', covid_india['Cured/Discharged/Migrated'].sum())
print(f'Total number of Deaths due to COVID 2019 across India:', covid_india['Deaths'].sum())
print(f'Total number of States/UTs affected:', covid_india['States'].count())
dbd_India = pd.read_excel('per_day_cases.xlsx',sheet_name='India')
fig = go.Figure()
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Total Cases'],
mode='lines+markers',name='Total Cases'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Recovered'],
mode='lines',name='Recovered'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Active'],
mode='lines',name='Active'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Deaths'],
mode='lines',name='Deaths'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=actual_total,
mode='lines+markers',name='Estimate of real Total cases'))
fig.update_layout(title_text='Trend of Coronavirus Cases in India(Confirmed vs Realistic cases)',plot_bgcolor='rgb(250, 242, 242)')
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Deaths'],
mode='lines',name='Deaths'))
fig.update_layout(title_text='Deaths',plot_bgcolor='rgb(250, 242, 242)')
fig.show()
import plotly.express as px
fig = go.Figure(data=[
go.Bar(name = "Confirmed cases", x=dbd_India.Date.tolist(), y=dbd_India['New Cases'].tolist()),
go.Bar(name = "Realistic", x=dbd_India.Date.tolist(), y=np.diff(actual_total))
])
fig.update_layout(barmode='group')
fig.update_layout(title_text='New Coronavirus Cases in India per day',plot_bgcolor='rgb(250, 242, 242)')
fig.show()
Indeterminable variables of the model The model used didnt look perfect, it just gave an idea of how many cases could be actually present.
There were a lot of additional variables to consider. For instance, age distribution in each country would also have an impact: Since mortality was observed much higher for older people, regions with aging population like in Europe would be harder hit on average than younger countries like India. There were more factors viz. weather - environmental, food and lifestyle habits. But it’s still unclear how this would impact transmission and fatality rates.
One more thing to consider was the number of tests per million people and number of hospital beds per 1000 people. In current situation, India ranks among the lowest in both. Many cases and deaths might go unreported especially in rural areas due to lack of testing and self treatment phenomenon that existed in India
Around 20% of cases require hospitalization, 5% of cases require the Intensive Care Unit (ICU), and around 2.5% require very intensive help, with items such as ventilators or ECMO (extra-corporeal oxygenation).
Social Distancing and Maintaining hygeine, Following guidelines layed down in public interest.
india = pd.read_excel('per_day_cases.xlsx',sheet_name='India')
italy = pd.read_excel('per_day_cases.xlsx',sheet_name="Italy")
korea = pd.read_excel('per_day_cases.xlsx',sheet_name="Korea")
india = india.fillna(0)
korea = korea.fillna(0)
italy = italy.fillna(0)
import seaborn as sns
plt.figure(figsize=[19,14])
sns.set(style='darkgrid',font_scale=2)
plt.title("Comparative distribution of rise of total cases reported of COVID-19 from Italy, Korea , India")
ax1 = sns.lineplot(x = "Date", y = "Total Cases", data = india,markers = True, dashes = False,label = 'India')
ax1 = sns.lineplot(x = "Date", y = "Total Cases", data = korea,color = 'orange',label = 'korea')
ax1 = sns.lineplot(x = "Date", y = "Total Cases", data = italy,color = 'red',label = 'Italy')
ax1.set(xlabel = 'Date', ylabel = 'Number of Total Cases Reported')
plt.xticks(rotation = 90)
plt.show()
South Korea recorded its first case on 20/01/2020 South Korea's plan of action worked well as they look to have contained the spread effectively.
Italy reported its first positive case on 29/01/2020. Since then, Italy's figures had been pounding.
First case was reported in India on 30/01/2020. India had contained the virus well until start of march, since then figures are rising exponentially. The starting phase is denoted in graph.
plt.figure(figsize=[22,14])
sns.set(style='darkgrid',font_scale=2)
plt.xticks(rotation=90)
plt.title("Comparative distribution of rise of New Cases reported of COVID-19 from Italy, Korea , India")
ax = sns.lineplot(x="Date", y="New Cases", data=india,markers=True, dashes=False,label='India')
ax = sns.lineplot(x="Date", y="New Cases", data=korea,color='orange',label='korea')
ax = sns.lineplot(x="Date", y="New Cases", data=italy,color='red',label='Italy')
ax.set(xlabel='Date',ylabel='Number of New Cases Reported')
plt.show()
In South Korea since March, the rise in new cases was found to be declining. By 3rd week of March, only few new cases were recorded daily. South Korea controlled the havoc effeciently.
On contrast, Italy despite ofits worldclass failed to control the situation. It's inefficiency in managing outbreak of COVID-19 was evident from above graph, where the trend of 'number of new cases registered daily' suggested the rise throughout the period.
In India, the rise in 'number of new cases registered daily' began to increase from 3rd Week of March following which lockdown was implemented in the country.
fig = go.Figure()
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=india['Total Cases'],
mode='lines+markers',name='India'))
fig.add_trace(go.Scatter(x=italy['Date'], y=italy['Total Cases'],
mode='lines+markers',name='Italy'))
fig.add_trace(go.Scatter(x=korea['Date'], y=korea['Total Cases'],
mode='lines+markers',name=' SKorea'))
fig.show()
From previous graph, it was seen that 'daily new cases' count was constantly rising in Italy. Hence, steep rise in total cases can be seen in Italy. By end of March, 69.17k people of Italy were infected. Since, 'daily new cases' count declined very fast in S.Korea, Total cases infected by Covid-19 were rising slowly.
india.head()
plt.figure(figsize=[28,28])
plt.subplot(2,2,1)
ax1 = sns.lineplot(x="Date", y="Total Cases", data=india[india['Days after surpassing 100 cases']==0],markers=True, dashes=False,label='Rise in Total cases in India Before surpassing 100 cases Benchmark',color='green')
ax1.set(xlabel='Date',ylabel='Number of Total Cases Reported')
plt.xticks(rotation=90)
plt.subplot(2,2,2)
ax2= sns.lineplot(x="Date", y="Total Cases", data=india[india['Days after surpassing 100 cases']>0],markers=True, dashes=False,label='Rise in Total cases in India After surpassing 100 cases Benchmark',color='red')
ax2.set(xlabel='Date',ylabel='Number of Total Cases Reported')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=[28,28])
plt.subplot(2,2,1)
ax1 = sns.lineplot(x="Date", y="Total Cases", data=korea[korea['Days after surpassing 100 cases']==0],markers=True, dashes=False,label='Rise in Total cases in Korea Before surpassing 100 cases Benchmark',color='green')
ax1.set(xlabel='Date',ylabel='Number of Total Cases Reported ')
plt.xticks(rotation=90)
plt.subplot(2,2,2)
ax2= sns.lineplot(x="Date", y="Total Cases", data=korea[korea['Days after surpassing 100 cases']>0],markers=True, dashes=False,label='Rise in Total cases in korea After surpassing 100 cases Benchmark',color='red')
ax2.set(xlabel='Date',ylabel='Number of Total Cases Reported')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=[34,28])
plt.subplot(2,2,1)
ax1 = sns.lineplot(x="Date", y="Total Cases", data = italy[italy['Days after surpassing 100 cases']==0],markers=True, dashes=False,label='Rise in Total cases in Korea Before surpassing 100 cases Benchmark',color='green')
ax1.set(xlabel='Date',ylabel='Number of Total Cases Reported ')
plt.xticks(rotation=90)
plt.subplot(2,2,2)
ax2= sns.lineplot(x="Date", y="Total Cases", data=italy[italy['Days after surpassing 100 cases']>0],markers=True, dashes=False,label='Rise in Total cases in korea After surpassing 100 cases Benchmark',color='red')
ax2.set(xlabel='Date',ylabel='Number of Total Cases Reported')
plt.xticks(rotation=90)
plt.show()
#Loading Global Data:
confirmed_df = pd.read_csv('time_series_covid_19_confirmed.csv')
deaths_df = pd.read_csv('time_series_covid_19_deaths.csv')
confirmed_df[confirmed_df['Country/Region']=='India']
cols = confirmed_df.keys()
Get all the dates for the outbreak
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
deaths = deaths_df.loc[:, cols[4]:cols[-1]]
confirmed
dates = confirmed.keys()
world_cases = []
total_deaths = []
mortality_rate = []
india_cases = []
for i in dates:
confirmed_sum = confirmed[i].sum()
death_sum = deaths[i].sum()
world_cases.append(confirmed_sum)
total_deaths.append(death_sum)
# calculate rates
mortality_rate.append(death_sum/confirmed_sum)
india_cases.append(confirmed_df[confirmed_df['Country/Region']=='India'][i].sum())
def daily_increase(data):
d = []
for i in range(len(data)):
if i == 0:
d.append(data[0])
else:
d.append(data[i]-data[i-1])
return d
world_daily_increase = daily_increase(world_cases)
india_daily_increase = daily_increase(india_cases)
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
india_cases = np.array(india_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
days_in_future = 30
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-30]
import datetime
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))
#Importing Libraries:
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
seed = 1
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, india_cases, test_size=0.28, random_state = seed)
svm_confirmed = SVR(shrinking=True, kernel='poly',gamma=0.01, epsilon=1,degree=6, C=0.1)
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
# check against testing data
svm_test_pred = svm_confirmed.predict(X_test_confirmed)
plt.plot(svm_test_pred)
plt.plot(y_test_confirmed)
print('MAE:', mean_absolute_error(svm_test_pred, y_test_confirmed))
print('MSE:',mean_squared_error(svm_test_pred, y_test_confirmed))
# transform our data for polynomial regression
poly = PolynomialFeatures(degree=5)
poly_X_train_confirmed = poly.fit_transform(X_train_confirmed)
poly_X_test_confirmed = poly.fit_transform(X_test_confirmed)
poly_future_forcast = poly.fit_transform(future_forcast)
# polynomial regression
linear_model = LinearRegression(normalize=True, fit_intercept=False)
linear_model.fit(poly_X_train_confirmed, y_train_confirmed)
test_linear_pred = linear_model.predict(poly_X_test_confirmed)
linear_pred = linear_model.predict(poly_future_forcast)
print('MAE:', mean_absolute_error(test_linear_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_linear_pred, y_test_confirmed))
print(linear_model.coef_)
plt.plot(test_linear_pred)
plt.plot(y_test_confirmed)
# bayesian ridge polynomial regression
tol = [1e-4, 1e-3, 1e-2]
alpha_1 = [1e-7, 1e-6, 1e-5, 1e-4]
alpha_2 = [1e-7, 1e-6, 1e-5, 1e-4]
lambda_1 = [1e-7, 1e-6, 1e-5, 1e-4]
lambda_2 = [1e-7, 1e-6, 1e-5, 1e-4]
bayesian_grid = {'tol': tol, 'alpha_1': alpha_1, 'alpha_2' : alpha_2, 'lambda_1': lambda_1, 'lambda_2' : lambda_2}
bayesian = BayesianRidge(fit_intercept=False, normalize=True)
bayesian_search = RandomizedSearchCV(bayesian, bayesian_grid, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=-1, n_iter=40, verbose=1)
bayesian_search.fit(poly_X_train_confirmed, y_train_confirmed)
bayesian_search.best_params_
bayesian_confirmed = bayesian_search.best_estimator_
test_bayesian_pred = bayesian_confirmed.predict(poly_X_test_confirmed)
bayesian_pred = bayesian_confirmed.predict(poly_future_forcast)
print('MAE:', mean_absolute_error(test_bayesian_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_bayesian_pred, y_test_confirmed))
plt.plot(y_test_confirmed)
plt.plot(test_bayesian_pred)
# Future predictions using SVM
print('SVM future predictions:')
set(zip(future_forcast_dates[-30:], np.round(svm_pred[-30:])))
svm_zip = set(zip(future_forcast_dates[-30:], np.round(svm_pred[-30:])))
# unzipping values
Date, Conf_cases = zip(*svm_zip)
Dates = list(Date)
Confirm_cases = list(Conf_cases)
print(Dates)
print(Confirm_cases)
# dictionary of lists
dict = {'dates': Dates, 'cases': Confirm_cases}
svm_df = pd.DataFrame(dict)
svm_df.head()
plt.figure(figsize=[28,28])
plt.subplot(2,2,1)
ax1 = sns.lineplot(x="dates", y="cases", data = svm_df, markers=True, dashes=True,label='SVM Forecast',color='red')
ax1.set(xlabel='Date',ylabel='Number of Cases Forecast')
plt.xticks(rotation=90)
The model predicted the total number of cases in India would reach 7700 by 27th April 2020.
# Future predictions using Polynomial Regression
linear_pred = linear_pred.reshape(1,-1)[0]
print('Polynomial regression future predictions:')
set(zip(future_forcast_dates[-30:], np.round(linear_pred[-30:])))
poly_zip = set(zip(future_forcast_dates[-30:], np.round(linear_pred[-30:])))
# unzipping values
Date, Conf_cases = zip(*poly_zip)
Dates = list(Date)
Confirm_cases = list(Conf_cases)
print(Dates)
print(Confirm_cases)
# dictionary of lists
dict = {'dates': Dates, 'cases': Confirm_cases}
poly_df = pd.DataFrame(dict)
poly_df.head()
plt.figure(figsize=[28,28])
plt.subplot(2,2,1)
ax1 = sns.lineplot(x="dates", y="cases", data = poly_df, markers=True, dashes=True,label='Ploy_reg Forecast',color='red')
ax1.set(xlabel='Date',ylabel='Number of Cases Forecast')
plt.xticks(rotation=90)
# Future predictions using Bayseian Ridge Regression
print('Ridge regression future predictions:')
set(zip(future_forcast_dates[-30:], np.round(bayesian_pred[-30:])))
bay_zip = set(zip(future_forcast_dates[-30:], np.round(bayesian_pred[-30:])))
# unzipping values
Date, Conf_cases = zip(*bay_zip)
Dates = list(Date)
Confirm_cases = list(Conf_cases)
print(Dates)
print(Confirm_cases)
# dictionary of lists
dict = {'dates': Dates, 'cases': Confirm_cases}
bay_df = pd.DataFrame(dict)
bay_df.head()
plt.figure(figsize=[28,28])
plt.subplot(2,2,1)
ax1 = sns.lineplot(x="dates", y="cases", data = bay_df, markers=True, dashes=True,label='Bay_Ridge_reg Forecast',color='red')
ax1.set(xlabel='Date',ylabel='Number of Cases Forecast')
plt.xticks(rotation=90)
In these model predictions, one major factor was not taken into consideration: India's move of Cuntry Lockdown. So, numbers are subjected to change.
Data of First week of April would set the proper tone for future predictions.
Getting information about countries/regions that have confirmed coronavirus cases:
import operator
latest_confirmed = confirmed_df[dates[-1]]
latest_deaths = deaths_df[dates[-1]]
unique_countries = list(confirmed_df['Country/Region'].unique())
country_confirmed_cases = []
no_cases = []
for i in unique_countries:
cases = latest_confirmed[confirmed_df['Country/Region']==i].sum()
if cases > 0:
country_confirmed_cases.append(cases)
else:
no_cases.append(i)
for i in no_cases:
unique_countries.remove(i)
# sort countries by the number of confirmed cases
unique_countries = [k for k, v in sorted(zip(unique_countries, country_confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(unique_countries)):
country_confirmed_cases[i] = latest_confirmed[confirmed_df['Country/Region']==unique_countries[i]].sum()
# number of cases per country/region
print('Confirmed Cases by Countries/Regions:')
for i in range(len(unique_countries)):
print(f'{unique_countries[i]}: {country_confirmed_cases[i]} cases')
COVID-19 is spreading with astonishing speed; COVID-19 outbreaks in any setting have very serious consequences; and there is now strong evidence that non-pharmaceutical interventions can reduce and even interrupt transmission. Concerningly, global and national preparedness planning is often ambivalent about such interventions. However, to reduce COVID-19 illness and death, near-term readiness planning must embrace the large-scale implementation of high-quality, non-pharmaceutical public health measures. These measures must fully incorporate immediate case detection and isolation, rigorous close contact tracing and monitoring/quarantine, and direct population/community engagement.